Load Libraries and set working directory
###
libs<-c('scater','loomR','Seurat','patchwork','SeuratDisk','monocle3','ggpubr','cowplot','ggplot2','ggbeeswarm','clusterProfiler',
'monocle3','limma','edgeR','fitdistrplus','factoextra','ggrepel','tidyverse','pheatmap','reshape2','ComplexHeatmap','survRM2',
"survminer","survival","ggalluvial",'gtsummary','variancePartition','DirichletReg','cowsay','emmeans','forestmodel')
lapply(libs, require, character.only = TRUE) ; rm(libs)
###
setwd("/Users/gonzae34/Documents/projects_gnjatic/MMRF_projects/immune_atlas/")
### pops
file_717_per_cell_md <- readRDS(file='/Users/gonzae34/Documents/projects_gnjatic/MMRF_projects/immune_atlas/RData/1110_per_cell_md.rds')
x <- table(file_717_per_cell_md$subcluster_V03072023[file_717_per_cell_md$visit_type %in% 'baseline_diagnosis'],
file_717_per_cell_md$sample_id[file_717_per_cell_md$visit_type %in% 'baseline_diagnosis'])
class(x) <- 'matrix' ; x[x>0]<-1 ; x <- rowSums(x) ; x <- sort(x)
sample_size_df <- data.frame(subcluster=names(x),samples=as.numeric(x))
sample_size_df$more_than_median <- sample_size_df$samples > median(sample_size_df$samples)
sample_size_df$more_than_q1 <- sample_size_df$samples > quantile(sample_size_df$samples, probs=c(0.25))
### Compartment
cox_surv_os_compartment_multi <- readRDS(file='/Users/gonzae34/Documents/projects_gnjatic/MMRF_projects/immune_atlas/RData/results_cox_surv_os_compartment_multi.RDS')
cox_surv_pfs_compartment_multi <- readRDS(file='/Users/gonzae34/Documents/projects_gnjatic/MMRF_projects/immune_atlas/RData/results_cox_surv_pfs_compartment_multi.RDS')
cox_surv_os_compartment_uni <- readRDS(file='/Users/gonzae34/Documents/projects_gnjatic/MMRF_projects/immune_atlas/RData/results_cox_surv_os_compartment_uni.RDS')
cox_surv_pfs_compartment_uni <- readRDS(file='/Users/gonzae34/Documents/projects_gnjatic/MMRF_projects/immune_atlas/RData/results_cox_surv_pfs_compartment_uni.RDS')
### OS
cox_surv_os_subcluster_comp_multi <- readRDS(file='/Users/gonzae34/Documents/projects_gnjatic/MMRF_projects/immune_atlas/RData/results_cox_surv_os_subcluster_comp_multi.RDS')
cox_surv_os_subcluster_comp_uni <- readRDS(file='/Users/gonzae34/Documents/projects_gnjatic/MMRF_projects/immune_atlas/RData/results_cox_surv_os_subcluster_comp_uni.RDS')
cox_surv_os_subcluster_all_multi <- readRDS(file='/Users/gonzae34/Documents/projects_gnjatic/MMRF_projects/immune_atlas/RData/results_cox_surv_os_subcluster_all_multi.RDS')
cox_surv_os_subcluster_all_uni <- readRDS(file='/Users/gonzae34/Documents/projects_gnjatic/MMRF_projects/immune_atlas/RData/results_cox_surv_os_subcluster_all_uni.RDS')
### PFS
cox_surv_pfs_subcluster_comp_multi <- readRDS(file='/Users/gonzae34/Documents/projects_gnjatic/MMRF_projects/immune_atlas/RData/results_cox_surv_pfs_subcluster_comp_multi.RDS')
cox_surv_pfs_subcluster_comp_uni <- readRDS(file='/Users/gonzae34/Documents/projects_gnjatic/MMRF_projects/immune_atlas/RData/results_cox_surv_pfs_subcluster_comp_uni.RDS')
cox_surv_pfs_subcluster_all_multi <- readRDS(file='/Users/gonzae34/Documents/projects_gnjatic/MMRF_projects/immune_atlas/RData/results_cox_surv_pfs_subcluster_all_multi.RDS')
cox_surv_pfs_subcluster_all_uni <- readRDS(file='/Users/gonzae34/Documents/projects_gnjatic/MMRF_projects/immune_atlas/RData/results_cox_surv_pfs_subcluster_all_uni.RDS')
tmp_df <- cox_surv_pfs_subcluster_all_multi
tmp_df$MarkerV2 <- paste(tmp_df$Marker,tmp_df$label,sep='.')
tmp_df$MarkerV2 <- gsub('d_dx_amm_iss_stage','ISS',tmp_df$MarkerV2)
tmp_df$HR[which(tmp_df$HR<0.1)] <- 0.1
for(i in 1:nrow(tmp_df)){tmp_df$fdr[i] <- p.adjust(tmp_df$pval[i],n = length(table(tmp_df$Marker))) }
pdf(file = '/Users/gonzae34/Documents/projects_gnjatic/MMRF_projects/immune_atlas/figures/clinical_cox_surv_pfs_subcluster_all_multi_summary_pval.pdf',width = 10,height = 8)
ggplot(data=tmp_df)+aes(x=HR,y=-log10(pval)) + geom_point() +
geom_hline(yintercept = 1.3,linetype=2,color='red') +
geom_vline(xintercept = 1,linetype=2,color='black') +
scale_x_log10() +
geom_text_repel(data=tmp_df[which(tmp_df$pval<0.05),],aes(label=MarkerV2),show.legend = F,force = 10,size=3) +
labs(x='PFS\nCox HR',y='PFS\n-Log10(pval)') + theme_classic()
dev.off()
## quartz_off_screen
## 2
pdf(file = '/Users/gonzae34/Documents/projects_gnjatic/MMRF_projects/immune_atlas/figures/clinical_cox_surv_pfs_subcluster_all_multi_summary_fdr.pdf',width = 10,height = 8)
ggplot(data=tmp_df)+aes(x=HR,y=-log10(fdr)) + geom_point() +
geom_hline(yintercept = 1.3,linetype=2,color='red') +
geom_vline(xintercept = 1,linetype=2,color='black') +
scale_x_log10() +
geom_text_repel(data=tmp_df[which(tmp_df$fdr<0.05),],aes(label=MarkerV2),show.legend = F,force = 10,size=3) +
labs(x='PFS\nCox HR',y='PFS\n-Log10(FDR)') + theme_classic()
dev.off()
## quartz_off_screen
## 2
tmp_df <- cox_surv_pfs_subcluster_comp_multi
tmp_df$MarkerV2 <- paste(tmp_df$Marker,tmp_df$label,sep='.')
tmp_df$MarkerV2 <- gsub('d_dx_amm_iss_stage','ISS',tmp_df$MarkerV2)
tmp_df$HR[which(tmp_df$HR<0.1)] <- 0.1
for(i in 1:nrow(tmp_df)){tmp_df$fdr[i] <- p.adjust(tmp_df$pval[i],n = length(table(tmp_df$Marker))) }
pdf(file = '/Users/gonzae34/Documents/projects_gnjatic/MMRF_projects/immune_atlas/figures/clinical_cox_surv_pfs_subcluster_comp_multi_summary_pval.pdf',width = 10,height = 8)
ggplot(data=tmp_df)+aes(x=HR,y=-log10(pval)) + geom_point() +
geom_hline(yintercept = 1.3,linetype=2,color='red') +
geom_vline(xintercept = 1,linetype=2,color='black') +
scale_x_log10() +
geom_text_repel(data=tmp_df[which(tmp_df$pval<0.05),],aes(label=MarkerV2),show.legend = F,force = 10,size=3) +
labs(x='PFS\nCox HR',y='PFS\n-Log10(pval)') + theme_classic()
dev.off()
## quartz_off_screen
## 2
pdf(file = '/Users/gonzae34/Documents/projects_gnjatic/MMRF_projects/immune_atlas/figures/clinical_cox_surv_pfs_subcluster_comp_multi_summary_fdr.pdf',width = 10,height = 8)
ggplot(data=tmp_df)+aes(x=HR,y=-log10(fdr)) + geom_point() +
geom_hline(yintercept = 1.3,linetype=2,color='red') +
geom_vline(xintercept = 1,linetype=2,color='black') +
scale_x_log10() +
geom_text_repel(data=tmp_df[which(tmp_df$fdr<0.05),],aes(label=MarkerV2),show.legend = F,force = 10,size=3) +
labs(x='PFS\nCox HR',y='PFS\n-Log10(FDR)') + theme_classic()
dev.off()
## quartz_off_screen
## 2
tmp_df <- cox_surv_pfs_subcluster_comp_multi
tmp_df$MarkerV2 <- paste(tmp_df$Marker,tmp_df$label,sep='.')
tmp_df$MarkerV2 <- gsub('d_dx_amm_iss_stage','ISS',tmp_df$MarkerV2)
tmp_df$HR[which(tmp_df$HR<0.1)] <- 0.1
ggplot(data=tmp_df)+aes(x=HR,y=-log10(pval)) + geom_point() +
geom_hline(yintercept = 1.3,linetype=2,color='red') +
geom_vline(xintercept = 1,linetype=2,color='black') +
scale_x_log10() +
geom_text_repel(data=tmp_df[which(tmp_df$pval<0.05),],aes(label=MarkerV2),show.legend = F,force = 10,size=3) +
labs(x='PFS\nCox HR',y='PFS\n-Log10(pval') + theme_classic()

### all
a<-cox_surv_pfs_subcluster_all_multi ; b<-cox_surv_pfs_subcluster_all_uni
x<-list(multi=unique(a$Marker[which(a$pval<0.05 & ! a$label %in% c('d_dx_amm_iss_stage','B2') & ! a$model %in% 'Univariate')]),
uni=unique(b$Marker[which(b$pval<0.05)]))
subcluster_all_sig_markers_pfs <- Reduce(intersect,x)
subcluster_all_sig_markers_pfs
## [1] "BEry.1" "BEry.3" "BEry.9" "BEry.15.1" "Myeloid.2"
## [6] "Myeloid.5" "Myeloid.12" "NkT.1.1" "NkT.2.0" "NkT.6"
## [11] "Plasma.4" "Plasma.15" "Plasma.21"
### comp
a<-cox_surv_pfs_subcluster_comp_multi ; b<-cox_surv_pfs_subcluster_comp_uni
x<-list(multi=unique(a$Marker[which(a$pval<0.05 & ! a$label %in% c('d_dx_amm_iss_stage','B2') & ! a$model %in% 'Univariate')]),
uni=unique(b$Marker[which(b$pval<0.05)]))
subcluster_comp_sig_markers_pfs <- Reduce(intersect,x)
subcluster_comp_sig_markers_pfs
## [1] "NkT.0" "NkT.2.3" "NkT.6" "BEry.1" "BEry.3"
## [6] "BEry.9" "BEry.15.0" "BEry.15.1" "Myeloid.5" "Myeloid.12"
## [11] "Plasma.4" "Plasma.15" "Plasma.21"
### final pfs
x<-list(all=subcluster_all_sig_markers_pfs,comp=subcluster_comp_sig_markers_pfs)
final_list_pfs <- Reduce(intersect,x)
final_list_pfs
## [1] "BEry.1" "BEry.3" "BEry.9" "BEry.15.1" "Myeloid.5"
## [6] "Myeloid.12" "NkT.6" "Plasma.4" "Plasma.15" "Plasma.21"
### all
a<-cox_surv_os_subcluster_all_multi ; b<-cox_surv_os_subcluster_all_uni
x<-list(multi=unique(a$Marker[which(a$pval<0.05 & ! a$label %in% c('d_dx_amm_iss_stage','B2') & ! a$model %in% 'Univariate')]),
uni=unique(b$Marker[which(b$pval<0.05)]))
subcluster_all_sig_markers_os <- Reduce(intersect,x)
subcluster_all_sig_markers_os
## [1] "BEry.1" "BEry.2" "BEry.3" "BEry.16" "Myeloid.2"
## [6] "Myeloid.12" "NkT.1.1" "NkT.1.4" "NkT.2.0" "NkT.2.2"
## [11] "NkT.3.1" "NkT.6" "Plasma.7" "Plasma.21"
### comp
a<-cox_surv_os_subcluster_comp_multi ; b<-cox_surv_os_subcluster_comp_uni
x<-list(multi=unique(a$Marker[which(a$pval<0.05 & ! a$label %in% c('d_dx_amm_iss_stage','B2') & ! a$model %in% 'Univariate')]),
uni=unique(b$Marker[which(b$pval<0.05)]))
subcluster_comp_sig_markers_os <- Reduce(intersect,x)
subcluster_comp_sig_markers_os
## [1] "NkT.1.4" "NkT.3.1" "NkT.6" "NkT.13" "BEry.1"
## [6] "BEry.2" "BEry.7" "BEry.15.0" "BEry.16" "Ery.5"
## [11] "Myeloid.12" "Plasma.4" "Plasma.21"
### final pfs
x<-list(all=subcluster_all_sig_markers_os,comp=subcluster_comp_sig_markers_os)
final_list_os <- Reduce(intersect,x)
Reduce(intersect,list(a=final_list_pfs,b=final_list_os))
## [1] "BEry.1" "Myeloid.12" "NkT.6" "Plasma.21"
final_list_pfs[which(final_list_pfs %in% sample_size_df$subcluster[which(sample_size_df$more_than_median %in% TRUE )])]
## [1] "BEry.1" "BEry.3" "Myeloid.5" "NkT.6"
final_list_pfs[which(final_list_pfs %in% sample_size_df$subcluster[which(sample_size_df$more_than_q1 %in% TRUE )])]
## [1] "BEry.1" "BEry.3" "BEry.9" "Myeloid.5" "NkT.6" "Plasma.4"
final_list_os[which(final_list_os %in% sample_size_df$subcluster[which(sample_size_df$more_than_median %in% TRUE )])]
## [1] "BEry.1" "BEry.2" "NkT.1.4" "NkT.3.1" "NkT.6"
final_list_os[which(final_list_os %in% sample_size_df$subcluster[which(sample_size_df$more_than_q1 %in% TRUE )])]
## [1] "BEry.1" "BEry.2" "BEry.16" "NkT.1.4" "NkT.3.1" "NkT.6"
### clinical
load(file='/Users/gonzae34/Documents/projects_gnjatic/MMRF_projects/immune_atlas/RData/clinical_metadata_IA22_Feb03_release.RData')
###
cmia22 <- clinical_metadata_IA22_Feb03_release[which(clinical_metadata_IA22_Feb03_release$public_id %in% file_717_per_cell_md$public_id),]
cmia22 <- cmia22[,colnames(cmia22) %in% c('censpfs','ttcpfs','censos','ttcos', 'collection_event','sample_id','d_pt_sex','public_id')]
cmia22 <- unique(cmia22)
###
my_vars <- c('public_id','davies_based_risk','visit_type','siteXbatch','progression_group','d_dx_amm_age','d_dx_amm_iss_stage','collection_event','sample_id')
###
x <- file_717_per_cell_md[,which(colnames(file_717_per_cell_md) %in% my_vars)]
rownames(x) <- NULL ; x <- unique(x)
x$site <- tidyr::separate(data.frame(x$siteXbatch),1, sep="_", c("a","b",'c','d'))$a
x$batch <- tidyr::separate(data.frame(x$siteXbatch),1, sep="_", c("a","b",'c','d'))$b
x$siteXbatch <- NULL
meta_df <- x
meta_df <- merge(meta_df,cmia22,by='public_id',all=TRUE) ; rm(cmia22)
###
ix <- which(meta_df$davies_based_risk %in% c('standard_risk','high_risk') & meta_df$collection_event %in% 'Baseline')
meta_baseline_df <- meta_df[ix,] ; rm(meta_df)
### COMP
y <- table(file_717_per_cell_md$subcluster_V03072023,file_717_per_cell_md$sample_id)
class(y) <- 'matrix'
my_doublets <- unique(as.character(file_717_per_cell_md$subcluster_V03072023[file_717_per_cell_md$doublet_pred %in% c('dblet_cluster','poss_dblet_cluster')]))
y <- y[!rownames(y) %in% my_doublets,]
y <- y[,which(colnames(y) %in% meta_baseline_df$sample_id)]
Compartment <- c('NkT','BEry','Ery','Myeloid','Plasma') #,'Full'
cell_proportions <- list()
for(iii in Compartment){
ix <- grep(paste('^',iii,sep=''), rownames(y))
z <- y[ix,]
cell_proportions[[iii]] = DR_data(t(z))
}
cell_props_comp <- do.call(cbind,cell_proportions)
###
subcluster_comp_fulldf <- cbind(meta_baseline_df,cell_props_comp)
### All populations
y <- table(file_717_per_cell_md$subcluster_V03072023,file_717_per_cell_md$sample_id)
class(y) <- 'matrix'
my_doublets <- unique(as.character(file_717_per_cell_md$subcluster_V03072023[file_717_per_cell_md$doublet_pred %in% c('dblet_cluster','poss_dblet_cluster')]))
y <- y[!rownames(y) %in% my_doublets,]
### remove non baseline/relevant
y <- y[,which(colnames(y) %in% meta_baseline_df$sample_id)]
cell_props_all = DR_data(t(y))
identical(meta_baseline_df$sample_id,rownames(cell_props_all))
## [1] FALSE
cell_props_all <- cell_props_all[match(meta_baseline_df$sample_id,rownames(cell_props_all)),]
identical(meta_baseline_df$sample_id,rownames(cell_props_all))
## [1] TRUE
subcluster_all_fulldf <- cbind(meta_baseline_df,cell_props_all)
### function to label quartiles
get_four_quantiles <- function(p,x){
y <- x[,p] ; z <- x[,p]
z[y < quantile(y, probs=c(0.25))] <- 'Q1'
z[y > quantile(y, probs=c(0.75))] <- 'Q4'
z[y >= quantile(y, probs=c(0.25)) & y <= quantile(y, probs=c(0.5))] <- 'Q2'
z[y <= quantile(y, probs=c(0.75)) & y >= quantile(y, probs=c(0.5))] <- 'Q3'
return(z)
}
### function to label medians
get_median <- function(p,x){
y <- x[,p] ; z <- rep('Low',length(y))
z[y > median(y)] <- 'High'
return(z)
}
#[1] "BEry.1" "BEry.2" "NkT.1.4" "NkT.3.1" "NkT.6" #[1] "BEry.1" "BEry.3" "Myeloid.5" "NkT.6"
tmp_df <- subcluster_comp_fulldf
tmp_df$x <- get_four_quantiles(p = "Myeloid.5",x = tmp_df)
forest_model( coxph(Surv(ttcpfs, censpfs==1) ~ x + d_pt_sex + d_dx_amm_age + d_dx_amm_iss_stage + site + batch, data = tmp_df) )

forest_model( coxph(Surv(ttcpfs, censpfs==1) ~ x + d_pt_sex + d_dx_amm_age, data = tmp_df) )

tmp_df <- subcluster_all_fulldf
tmp_df$x <- get_four_quantiles(p = "Myeloid.5",x = tmp_df)
forest_model( coxph(Surv(ttcpfs, censpfs==1) ~ x + d_pt_sex + d_dx_amm_age + d_dx_amm_iss_stage + site + batch, data = tmp_df) )

forest_model( coxph(Surv(ttcpfs, censpfs==1) ~ x + d_pt_sex + d_dx_amm_age, data = tmp_df) )

tmp_df <- subcluster_comp_fulldf
tmp_df$x <- get_four_quantiles(p = "NkT.6",x = tmp_df)
forest_model( coxph(Surv(ttcpfs, censpfs==1) ~ x + d_pt_sex + d_dx_amm_age + d_dx_amm_iss_stage + site + batch, data = tmp_df) )

forest_model( coxph(Surv(ttcpfs, censpfs==1) ~ x + d_pt_sex + d_dx_amm_age, data = tmp_df) )

tmp_df <- subcluster_all_fulldf
tmp_df$x <- get_four_quantiles(p = "NkT.6",x = tmp_df)
forest_model( coxph(Surv(ttcpfs, censpfs==1) ~ x + d_pt_sex + d_dx_amm_age + d_dx_amm_iss_stage + site + batch, data = tmp_df) ) + labs(title = paste('PFS',sep = ':'))

forest_model( coxph(Surv(ttcpfs, censpfs==1) ~ x + d_pt_sex + d_dx_amm_age, data = tmp_df) )

tmp_df <- subcluster_comp_fulldf
tmp_df$x <- get_four_quantiles(p = "NkT.3.1",x = tmp_df)
forest_model( coxph(Surv(ttcpfs, censpfs==1) ~ x + d_pt_sex + d_dx_amm_age + d_dx_amm_iss_stage + site + batch, data = tmp_df) )

forest_model( coxph(Surv(ttcpfs, censpfs==1) ~ x + d_pt_sex + d_dx_amm_age, data = tmp_df) )

tmp_df <- subcluster_all_fulldf
tmp_df$x <- get_four_quantiles(p = "NkT.3.1",x = tmp_df)
forest_model( coxph(Surv(ttcpfs, censpfs==1) ~ x + d_pt_sex + d_dx_amm_age + d_dx_amm_iss_stage + site + batch, data = tmp_df) ) + labs(title = paste('PFS',sep = ':'))

forest_model( coxph(Surv(ttcpfs, censpfs==1) ~ x + d_pt_sex + d_dx_amm_age, data = tmp_df) )

tmp_df <- subcluster_comp_fulldf
tmp_df$x <- get_four_quantiles(p = "NkT.1.4",x = tmp_df)
forest_model( coxph(Surv(ttcpfs, censpfs==1) ~ x + d_pt_sex + d_dx_amm_age + d_dx_amm_iss_stage + site + batch, data = tmp_df) )

forest_model( coxph(Surv(ttcpfs, censpfs==1) ~ x + d_pt_sex + d_dx_amm_age, data = tmp_df) )

tmp_df <- subcluster_all_fulldf
tmp_df$x <- get_four_quantiles(p = "NkT.1.4",x = tmp_df)
forest_model( coxph(Surv(ttcpfs, censpfs==1) ~ x + d_pt_sex + d_dx_amm_age + d_dx_amm_iss_stage + site + batch, data = tmp_df) ) + labs(title = paste('PFS',sep = ':'))

forest_model( coxph(Surv(ttcpfs, censpfs==1) ~ x + d_pt_sex + d_dx_amm_age, data = tmp_df) )

tmp_df <- subcluster_comp_fulldf
tmp_df$x <- get_four_quantiles(p = "BEry.1",x = tmp_df)
forest_model( coxph(Surv(ttcpfs, censpfs==1) ~ x + d_pt_sex + d_dx_amm_age + d_dx_amm_iss_stage + site + batch, data = tmp_df) )

forest_model( coxph(Surv(ttcpfs, censpfs==1) ~ x + d_pt_sex + d_dx_amm_age, data = tmp_df) )

tmp_df <- subcluster_all_fulldf
tmp_df$x <- get_four_quantiles(p = "BEry.1",x = tmp_df)
forest_model( coxph(Surv(ttcpfs, censpfs==1) ~ x + d_pt_sex + d_dx_amm_age + d_dx_amm_iss_stage + site + batch, data = tmp_df) )

forest_model( coxph(Surv(ttcpfs, censpfs==1) ~ x + d_pt_sex + d_dx_amm_age, data = tmp_df) )

tmp_df <- subcluster_comp_fulldf
tmp_df$x <- get_four_quantiles(p = "BEry.2",x = tmp_df)
forest_model( coxph(Surv(ttcpfs, censpfs==1) ~ x + d_pt_sex + d_dx_amm_age + d_dx_amm_iss_stage + site + batch, data = tmp_df) )

forest_model( coxph(Surv(ttcpfs, censpfs==1) ~ x + d_pt_sex + d_dx_amm_age, data = tmp_df) )

tmp_df <- subcluster_all_fulldf
tmp_df$x <- get_four_quantiles(p = "BEry.2",x = tmp_df)
forest_model( coxph(Surv(ttcpfs, censpfs==1) ~ x + d_pt_sex + d_dx_amm_age + d_dx_amm_iss_stage + site + batch, data = tmp_df) )

forest_model( coxph(Surv(ttcpfs, censpfs==1) ~ x + d_pt_sex + d_dx_amm_age, data = tmp_df) )

tmp_df <- subcluster_comp_fulldf
tmp_df$x <- get_four_quantiles(p = "BEry.3",x = tmp_df)
forest_model( coxph(Surv(ttcpfs, censpfs==1) ~ x + d_pt_sex + d_dx_amm_age + d_dx_amm_iss_stage + site + batch, data = tmp_df) )

forest_model( coxph(Surv(ttcpfs, censpfs==1) ~ x + d_pt_sex + d_dx_amm_age, data = tmp_df) )

tmp_df <- subcluster_all_fulldf
tmp_df$x <- get_four_quantiles(p = "BEry.3",x = tmp_df)
forest_model( coxph(Surv(ttcpfs, censpfs==1) ~ x + d_pt_sex + d_dx_amm_age + d_dx_amm_iss_stage + site + batch, data = tmp_df) )

forest_model( coxph(Surv(ttcpfs, censpfs==1) ~ x + d_pt_sex + d_dx_amm_age, data = tmp_df) )

library(rms)
### "BEry.1" "BEry.2" "NkT.1.4" "NkT.3.1" "BEry.3" "Myeloid.5" "NkT.6"
tmp_df <- subcluster_all_fulldf
tmp_df$Myeloid.5.Score <- get_median(p = "Myeloid.5",x = tmp_df)
tmp_df$Myeloid.5.Score <- factor(tmp_df$Myeloid.5.Score,levels = c('Low','High'))
tmp_df$NkT.6.Score <- get_median(p = "NkT.6",x = tmp_df)
tmp_df$NkT.6.Score <- factor(tmp_df$NkT.6.Score,levels = c('Low','High'))
tmp_df$NkT.1.4.Score <- get_median(p = "NkT.1.4",x = tmp_df)
tmp_df$NkT.1.4.Score <- factor(tmp_df$NkT.1.4.Score,levels = c('Low','High'))
tmp_df$NkT.3.1.Score <- get_median(p = "NkT.3.1",x = tmp_df)
tmp_df$NkT.3.1.Score <- factor(tmp_df$NkT.3.1.Score,levels = c('Low','High'))
tmp_df$BEry.1.Score <- get_median(p = "BEry.1",x = tmp_df)
tmp_df$BEry.1.Score <- factor(tmp_df$BEry.1.Score,levels = c('Low','High'))
tmp_df$BEry.2.Score <- get_median(p = "BEry.2",x = tmp_df)
tmp_df$BEry.2.Score <- factor(tmp_df$BEry.2.Score,levels = c('Low','High'))
tmp_df$BEry.3.Score <- get_median(p = "BEry.3",x = tmp_df)
tmp_df$BEry.3.Score <- factor(tmp_df$BEry.3.Score,levels = c('Low','High'))
tmp_df$Plasma.4.Score <- get_median(p = "Plasma.4",x = tmp_df)
tmp_df$Plasma.4.Score <- factor(tmp_df$Plasma.4.Score,levels = c('Low','High'))
tmp_df$Cytogenetic.Risk <- as.character(tmp_df$davies_based_risk)
tmp_df$Cytogenetic.Risk[tmp_df$Cytogenetic.Risk %in% 'standard_risk'] <- 'Standard'
tmp_df$Cytogenetic.Risk[tmp_df$Cytogenetic.Risk %in% 'high_risk'] <- 'High'
tmp_df$Cytogenetic.Risk <- factor(tmp_df$Cytogenetic.Risk,levels = c('Standard','High'))
tmp_df$Sex <- factor(tmp_df$d_pt_sex,levels = c('female','male'))
tmp_df$ISS.Stage <- tmp_df$d_dx_amm_iss_stage
tmp_df$Age <- tmp_df$d_dx_amm_age
tmp_ALL_df <- tmp_df
ix<-grep('Score',colnames(tmp_ALL_df))
colnames(tmp_ALL_df)[ix]
## [1] "Myeloid.5.Score" "NkT.6.Score" "NkT.1.4.Score" "NkT.3.1.Score"
## [5] "BEry.1.Score" "BEry.2.Score" "BEry.3.Score" "Plasma.4.Score"
x<-tmp_ALL_df[,ix]
for(i in 1:ncol(x)){x[,i]<-as.numeric(x[,i])}
pheatmap::pheatmap(cor(x))

tmp_df <- subcluster_comp_fulldf
tmp_df$Myeloid.5.Score <- get_median(p = "Myeloid.5",x = tmp_df)
tmp_df$Myeloid.5.Score <- factor(tmp_df$Myeloid.5.Score,levels = c('Low','High'))
tmp_df$NkT.6.Score <- get_median(p = "NkT.6",x = tmp_df)
tmp_df$NkT.6.Score <- factor(tmp_df$NkT.6.Score,levels = c('Low','High'))
tmp_df$NkT.1.4.Score <- get_median(p = "NkT.1.4",x = tmp_df)
tmp_df$NkT.1.4.Score <- factor(tmp_df$NkT.1.4.Score,levels = c('Low','High'))
tmp_df$NkT.3.1.Score <- get_median(p = "NkT.3.1",x = tmp_df)
tmp_df$NkT.3.1.Score <- factor(tmp_df$NkT.3.1.Score,levels = c('Low','High'))
tmp_df$BEry.1.Score <- get_median(p = "BEry.1",x = tmp_df)
tmp_df$BEry.1.Score <- factor(tmp_df$BEry.1.Score,levels = c('Low','High'))
tmp_df$BEry.2.Score <- get_median(p = "BEry.2",x = tmp_df)
tmp_df$BEry.2.Score <- factor(tmp_df$BEry.2.Score,levels = c('Low','High'))
tmp_df$BEry.3.Score <- get_median(p = "BEry.3",x = tmp_df)
tmp_df$BEry.3.Score <- factor(tmp_df$BEry.3.Score,levels = c('Low','High'))
tmp_df$Plasma.4[is.na(tmp_df$Plasma.4)] <- 0
tmp_df$Plasma.4.Score <- get_median(p = "Plasma.4",x = tmp_df)
tmp_df$Plasma.4.Score <- factor(tmp_df$Plasma.4.Score,levels = c('Low','High'))
tmp_df$Cytogenetic.Risk <- as.character(tmp_df$davies_based_risk)
tmp_df$Cytogenetic.Risk[tmp_df$Cytogenetic.Risk %in% 'standard_risk'] <- 'Standard'
tmp_df$Cytogenetic.Risk[tmp_df$Cytogenetic.Risk %in% 'high_risk'] <- 'High'
tmp_df$Cytogenetic.Risk <- factor(tmp_df$Cytogenetic.Risk,levels = c('Standard','High'))
tmp_df$Sex <- factor(tmp_df$d_pt_sex,levels = c('female','male'))
tmp_df$ISS.Stage <- tmp_df$d_dx_amm_iss_stage
tmp_df$Age <- tmp_df$d_dx_amm_age
table(tmp_df$progression_group,tmp_df$davies_based_risk)
##
## high_risk standard_risk
## Inc 19 20
## NP 32 40
## P 32 29
## RP 39 19
table(tmp_df$Myeloid.5.Score,tmp_df$NkT.6.Score)
##
## Low High
## Low 63 52
## High 52 63
table(tmp_df$Myeloid.5.Score,tmp_df$davies_based_risk)
##
## high_risk standard_risk
## Low 56 59
## High 66 49
table(tmp_df$Myeloid.5.Score,tmp_df$NkT.1.4.Score)
##
## Low High
## Low 61 54
## High 54 61
table(tmp_df$Myeloid.5.Score,tmp_df$NkT.3.1.Score)
##
## Low High
## Low 59 56
## High 56 59
table(tmp_df$NkT.1.4.Score,tmp_df$NkT.3.1.Score)
##
## Low High
## Low 59 56
## High 56 59
table(tmp_df$NkT.1.4.Score,tmp_df$BEry.1.Score)
##
## Low High
## Low 60 55
## High 55 60
table(tmp_df$NkT.1.4.Score,tmp_df$BEry.2.Score)
##
## Low High
## Low 63 52
## High 52 63
table(tmp_df$NkT.1.4.Score,tmp_df$BEry.3.Score)
##
## Low High
## Low 54 61
## High 61 54
ix<-grep('Score',colnames(tmp_df))
colnames(tmp_df)[ix]
## [1] "Myeloid.5.Score" "NkT.6.Score" "NkT.1.4.Score" "NkT.3.1.Score"
## [5] "BEry.1.Score" "BEry.2.Score" "BEry.3.Score" "Plasma.4.Score"
x<-tmp_df[,ix]
for(i in 1:ncol(x)){x[,i]<-as.numeric(x[,i])}
pheatmap::pheatmap(cor(x))

PFS COX
forest_model( coxph(Surv(ttcpfs, censpfs==1) ~ davies_based_risk + d_pt_sex + d_dx_amm_iss_stage + d_dx_amm_age, data = tmp_df) )

forest_model( coxph(Surv(ttcpfs, censpfs==1) ~ NkT.6.Score + d_pt_sex + d_dx_amm_iss_stage + d_dx_amm_age + site + batch, data = tmp_df) )

forest_model( coxph(Surv(ttcpfs, censpfs==1) ~ NkT.6.Score + NkT.3.1.Score +
d_pt_sex + d_dx_amm_iss_stage + d_dx_amm_age + site + batch, data = tmp_df) )

forest_model( coxph(Surv(ttcpfs, censpfs==1) ~ NkT.6.Score + NkT.1.4.Score + NkT.3.1.Score +
d_pt_sex + d_dx_amm_iss_stage + d_dx_amm_age + site + batch, data = tmp_df) )

forest_model( coxph(Surv(ttcpfs, censpfs==1) ~ NkT.6.Score + NkT.1.4.Score + NkT.3.1.Score +
BEry.1.Score +
d_pt_sex + d_dx_amm_iss_stage + d_dx_amm_age + site + batch, data = tmp_df) )

forest_model( coxph(Surv(ttcpfs, censpfs==1) ~ NkT.6.Score + NkT.1.4.Score + NkT.3.1.Score +
BEry.1.Score + BEry.2.Score +
d_pt_sex + d_dx_amm_iss_stage + d_dx_amm_age + site + batch, data = tmp_df) )

forest_model( coxph(Surv(ttcpfs, censpfs==1) ~ NkT.6.Score + NkT.1.4.Score + NkT.3.1.Score +
BEry.1.Score + BEry.2.Score + BEry.3.Score +
d_pt_sex + d_dx_amm_iss_stage + d_dx_amm_age + site + batch, data = tmp_df) )

forest_model( coxph(Surv(ttcpfs, censpfs==1) ~ NkT.6.Score + Myeloid.5.Score + NkT.1.4.Score + NkT.3.1.Score +
BEry.1.Score + BEry.2.Score + BEry.3.Score +
d_pt_sex + d_dx_amm_iss_stage + d_dx_amm_age + site + batch, data = tmp_df) )

forest_model( coxph(Surv(ttcpfs, censpfs==1) ~ NkT.6.Score + Myeloid.5.Score + NkT.1.4.Score + NkT.3.1.Score +
BEry.1.Score + BEry.2.Score + BEry.3.Score +
d_pt_sex + d_dx_amm_iss_stage + d_dx_amm_age + site + batch, data = tmp_df) )

forest_model( coxph(Surv(ttcpfs, censpfs==1) ~ NkT.6.Score + Myeloid.5.Score + NkT.1.4.Score + NkT.3.1.Score +
BEry.1.Score + BEry.2.Score + BEry.3.Score +
d_pt_sex + d_dx_amm_iss_stage + d_dx_amm_age + site + batch, data = tmp_df) )

forest_model( coxph(Surv(ttcpfs, censpfs==1) ~ NkT.6.Score + Myeloid.5.Score + NkT.1.4.Score + NkT.3.1.Score +
BEry.1.Score + BEry.2.Score + BEry.3.Score + Plasma.4.Score +
d_pt_sex + d_dx_amm_iss_stage + d_dx_amm_age + site + batch, data = tmp_df) )

tmp_df_V2 <- tmp_df[,which(colnames(tmp_df) %in% c("BEry.1.Score","BEry.2.Score","BEry.3.Score","Myeloid.5.Score",
"NkT.1.4.Score","NkT.3.1.Score","NkT.6.Score","Plasma.4.Score",
'censpfs','Cytogenetic.Risk','Sex','ISS.Stage','Age'))]
ddist <- datadist(tmp_df_V2)
options(datadist='ddist')
mod.bi <- lrm(censpfs ~ Myeloid.5.Score + NkT.6.Score + NkT.1.4.Score + NkT.3.1.Score + BEry.1.Score + BEry.2.Score + BEry.3.Score + Plasma.4.Score +
Cytogenetic.Risk + ISS.Stage + Age + Sex, tmp_df_V2, x=TRUE)
nom.bi <- nomogram(mod.bi,fun = plogis,funlabel="Risk of Progression")
#pdf(file="risk.nonogram.pdf",width = 10,height = 10)
#plot(nom.bi,col.grid = gray(c(0.8, 0.95)))
#dev.off()
#pdf(file="/Users/gonzae34/Documents/projects_gnjatic/MMRF_projects/immune_atlas/figures/myeloid12.progressio.risk.nonogram.pdf",width = 7.5,height = 5)
#plot(nom.bi,col.grid = gray(c(0.8, 0.95)))
#dev.off()
#Predict(mod.bi)
tmp_df$pred <- predict(object = mod.bi, tmp_df_V2[,-1],se.fit = TRUE,type="fitted.ind")
tmp_df$pred[tmp_df$pred > 0.6] <- 'Integrated High Risk'
tmp_df$pred[!tmp_df$pred %in% 'Integrated High Risk'] <- 'Integrated Low Risk'
### PFS COX
forest_model( coxph(Surv(ttcpfs, censpfs==1) ~ pred + d_pt_sex + site + batch, data = tmp_df) )

#pdf(file="/Users/gonzae34/Documents/projects_gnjatic/MMRF_projects/immune_atlas/figures/integrated.cytogenetic.myeloid12.progression.risk.survival.pdf",width = 6,height = 6)
ggsurvplot( fit = survfit(Surv(ttcpfs, censpfs==1) ~ pred, data = tmp_df),
type=c("kaplan-meier"),surv.median.line = 'hv',log.rank.weights= "n",pval.method = TRUE,
xlab = "Days", ylab = "PFS Prob.",pval = TRUE,risk.table = TRUE,palette = c('darkgreen','purple'))

#dev.off()
iscore <- surv_pvalue( survfit(Surv(ttcpfs, censpfs==1) ~ pred, data = tmp_df, type=c("kaplan-meier")) , method = "survdiff")
mod.bi <- lrm(censpfs ~ NkT.6.Score + NkT.1.4.Score + NkT.3.1.Score + BEry.1.Score + BEry.2.Score + BEry.3.Score +
Cytogenetic.Risk + Age + Sex, tmp_df_V2, x=TRUE)
tmp_df$pred <- predict(object = mod.bi, tmp_df_V2[,-1],se.fit = TRUE,type="fitted.ind")
tmp_df$pred[tmp_df$pred > 0.6] <- 'Integrated High Risk'
tmp_df$pred[!tmp_df$pred %in% 'Integrated High Risk'] <- 'Integrated Low Risk'
forest_model( coxph(Surv(ttcpfs, censpfs==1) ~ pred + d_pt_sex + site + batch, data = tmp_df) )

ggsurvplot( fit = survfit(Surv(ttcpfs, censpfs==1) ~ pred, data = tmp_df),
type=c("kaplan-meier"),surv.median.line = 'hv',log.rank.weights= "n",pval.method = TRUE,
xlab = "Days", ylab = "PFS Prob.",pval = TRUE,risk.table = TRUE,palette = c('darkgreen','purple'))

iscore_noISS <- surv_pvalue( survfit(Surv(ttcpfs, censpfs==1) ~ pred, data = tmp_df, type=c("kaplan-meier")) , method = "survdiff")
mod.bi <- lrm(censpfs ~ Cytogenetic.Risk + Age + Sex, tmp_df_V2, x=TRUE)
tmp_df$pred <- predict(object = mod.bi, tmp_df_V2[,-1],se.fit = TRUE,type="fitted.ind")
tmp_df$pred[tmp_df$pred > 0.6] <- 'Integrated High Risk'
tmp_df$pred[!tmp_df$pred %in% 'Integrated High Risk'] <- 'Integrated Low Risk'
forest_model( coxph(Surv(ttcpfs, censpfs==1) ~ pred + d_pt_sex + site + batch, data = tmp_df) )

ggsurvplot( fit = survfit(Surv(ttcpfs, censpfs==1) ~ pred, data = tmp_df),
type=c("kaplan-meier"),surv.median.line = 'hv',log.rank.weights= "n",pval.method = TRUE,
xlab = "Days", ylab = "PFS Prob.",pval = TRUE,risk.table = TRUE,palette = c('darkgreen','purple'))

iscore_noISS_oly_cyto <- surv_pvalue( survfit(Surv(ttcpfs, censpfs==1) ~ pred, data = tmp_df, type=c("kaplan-meier")) , method = "survdiff")
mod.bi <- lrm(censpfs ~ Cytogenetic.Risk + Age + Sex, tmp_df_V2, x=TRUE)
tmp_df$pred <- predict(object = mod.bi, tmp_df_V2[,-1],se.fit = TRUE,type="fitted.ind")
tmp_df$pred[tmp_df$pred > 0.6] <- 'Integrated High Risk'
tmp_df$pred[!tmp_df$pred %in% 'Integrated High Risk'] <- 'Integrated Low Risk'
forest_model( coxph(Surv(ttcpfs, censpfs==1) ~ pred + d_pt_sex + site + batch, data = tmp_df) )

ggsurvplot( fit = survfit(Surv(ttcpfs, censpfs==1) ~ pred, data = tmp_df),
type=c("kaplan-meier"),surv.median.line = 'hv',log.rank.weights= "n",pval.method = TRUE,
xlab = "Days", ylab = "PFS Prob.",pval = TRUE,risk.table = TRUE,palette = c('darkgreen','purple'))

iscore_noISS_oly_cyto <- surv_pvalue( survfit(Surv(ttcpfs, censpfs==1) ~ pred, data = tmp_df, type=c("kaplan-meier")) , method = "survdiff")
mod.bi <- lrm(censpfs ~ NkT.6.Score + NkT.1.4.Score + NkT.3.1.Score + BEry.1.Score + BEry.2.Score + BEry.3.Score + Plasma.4.Score + Age + Sex, tmp_df_V2, x=TRUE)
tmp_df$pred <- predict(object = mod.bi, tmp_df_V2[,-1],se.fit = TRUE,type="fitted.ind")
tmp_df$pred[tmp_df$pred > 0.6] <- 'Integrated High Risk'
tmp_df$pred[!tmp_df$pred %in% 'Integrated High Risk'] <- 'Integrated Low Risk'
forest_model( coxph(Surv(ttcpfs, censpfs==1) ~ pred + d_pt_sex + site + batch, data = tmp_df) )

ggsurvplot( fit = survfit(Surv(ttcpfs, censpfs==1) ~ pred, data = tmp_df),
type=c("kaplan-meier"),surv.median.line = 'hv',log.rank.weights= "n",pval.method = TRUE,
xlab = "Days", ylab = "PFS Prob.",pval = TRUE,risk.table = TRUE,palette = c('darkgreen','purple'))

iscore_noISS_oly_I <- surv_pvalue( survfit(Surv(ttcpfs, censpfs==1) ~ pred, data = tmp_df, type=c("kaplan-meier")) , method = "survdiff")
iscore
## variable pval method pval.txt
## 1 pred 7.669755e-07 Log-rank p < 0.0001
iscore_noISS
## variable pval method pval.txt
## 1 pred 0.0002451338 Log-rank p = 0.00025
iscore_noISS_oly_cyto
## variable pval method pval.txt
## 1 pred 0.0003213409 Log-rank p = 0.00032
iscore_noISS_oly_I
## variable pval method pval.txt
## 1 pred 0.002422889 Log-rank p = 0.0024